This project investigates Symbiodinium communities in scleractinian and milleporine corals sampled at various sites on the north and south shores of St. John, US Virgin Islands in August 2012. The ITS2 region of nrDNA isolated from coral samples was amplified and sequenced by paired-end 300 bp reads on an Illumina MiSeq platform. Sequence data is processed through a custom bioinformatic pipeline and resulting OTU tables are statistically analyzed to ask questions regarding diversity and community structure of Symbiodinium within and across coral species and between shores. The major aims of this work are to:
Sampling locations in St. John
A custom pipeline is used here to process ITS2 sequence data. Briefly, paired reads were merged using illumina-utils and filtered to keep only those with 3 mismatches or less. Chimeras were then removed by usearch, and singletons were removed. From here, sequence data was processed using each of three approaches:
OTU tables and taxonomic assignments from each of these bioinformatic approaches are used in comparative downstream analyses.
The phyloseq package is used here to combine the OTU table, taxonomic assignments, and sample metadata into a single R data object (class ‘phyloseq’) to facilitate downstream analyses.
# Import taxonomic assignment data from nw
read.nw <- function(file) {
nw <- read.table(file, stringsAsFactors=FALSE)
nw <- nw[order(nw$otu),]
return(nw)
}
tax97 <- read.nw("data/otus_97/nw_tophits.tsv")
tax100 <- read.nw("data/otus_100/nw_tophits.tsv")
tax97bs <- read.nw("data/otus_97_bysample/nw_tophits.tsv")
tax97bsp <- read.nw("data/otus_97_byspecies/nw_tophits.tsv")
# Deal with identical taxonomic assignments (because some reference sequences are not unique...)
# Create names for groups of identical sequences based on members of group...
ident <- readLines("data/ITS2db_trimmed_notuniques_otus.txt")
ident <- gsub("denovo[0-9]*\t", "", ident)
ident <- strsplit(ident, split="\t")
ident2 <- lapply(ident, str_match_all, pattern="[A-I]{1}[0-9]{1,3}.*[_/]")
ident2 <- lapply(ident2, function(g) gsub(pattern="\\..*_$", "", x=unlist(g)))
ident2 <- lapply(ident2, function(g) gsub(pattern="_$", "", g))
ident2 <- lapply(ident2, function(g) unlist(strsplit(g, split="/")))
subtypes <- lapply(ident2, function(x) levels(factor(unlist(x))))
subtypes <- lapply(subtypes, function(s) paste(paste(s, collapse="/"), "_multiple", sep=""))
names(ident) <- subtypes
ident <- melt(do.call(rbind, ident))
## Warning in (function (..., deparse.level = 1) : number of columns of result
## is not a multiple of vector length (arg 6)
ident <- unique(ident[order(ident[,1]), c(3,1)])
# Replace any sequence name in taxonomy assignment that is a member of a group of identical sequences with the name of the group
collapse.idents <- function(df) {
within(df, {
for (i in 1:nrow(ident)) {
hit <- gsub(ident[i,1], ident[i,2], hit)
}
})
}
tax97 <- collapse.idents(tax97)
tax97bs <- collapse.idents(tax97bs)
tax97bsp <- collapse.idents(tax97bsp)
tax100 <- collapse.idents(tax100)
#tax97 <- get.st(tax97)
tax97 <- with(tax97, tax97[order(otu, -sim), ])
tax97 <- tax97[!duplicated(tax97$otu), ]
rownames(tax97) <- tax97$otu
#tax97bs <- get.st(tax97bs)
tax97bs <- with(tax97bs, tax97bs[order(otu, -sim), ])
tax97bs <- tax97bs[!duplicated(tax97bs$otu), ]
rownames(tax97bs) <- tax97bs$otu
#tax97bsp <- get.st(tax97bsp)
tax97bsp <- with(tax97bsp, tax97bsp[order(otu, -sim), ])
tax97bsp <- tax97bsp[!duplicated(tax97bsp$otu), ]
rownames(tax97bsp) <- tax97bsp$otu
#tax100 <- get.st(tax100)
tax100 <- with(tax100, tax100[order(otu, -sim), ])
tax100 <- tax100[!duplicated(tax100$otu), ]
rownames(tax100) <- tax100$otu
# Import OTU tables
otu97 <- otu_table(read.table("data/otus_97/97_otus.tsv", header=T, check.names=F, row.names=1,
skip=1, sep="\t", comment.char=""), taxa_are_rows=T)
otu97bs <- otu_table(read.table("data/otus_97_bysample/97_otus_bysample.tsv", header=T, check.names=F, row.names=1,
skip=1, sep="\t", comment.char=""), taxa_are_rows=T)
otu97bsp <- otu_table(read.table("data/otus_97_byspecies/97_otus_byspecies.tsv", header=T, check.names=F, row.names=1,
skip=1, sep="\t", comment.char=""), taxa_are_rows=T)
otu100 <- otu_table(read.table("data/otus_100/100_otus.tsv", header=T, check.names=F, row.names=1,
skip=1, sep="\t", comment.char=""), taxa_are_rows=T)
# Import sample data
sam <- sample_data(read.delim("data/mapping_file.txt", sep="\t", header=T, row.names=1))
# Build phyloseq objects
phy97 <- phyloseq(otu97, tax_table(as.matrix(tax97)), sam)
phy97bs <- phyloseq(otu97bs, tax_table(as.matrix(tax97bs)), sam)
phy97bsp <- phyloseq(otu97bsp, tax_table(as.matrix(tax97bsp)), sam)
phy100 <- phyloseq(otu100, tax_table(as.matrix(tax100)), sam)
# FILTER OUT OTUS THAT ARE NOT SYMBIODINIUM
# If the top hit in NCBI does not contain the string "Symbiodinium", then this sequence is assumed to not be Symbiodinium.
poormatch97 <- readLines("data/otus_97/poormatch_IDs.txt")
poormatch97 <- data.frame(otu=str_extract(poormatch97, "denovo[^ ]*"),
symbio=str_detect(poormatch97, "Symbiodinium"), # TRUE if Symbiodinium
stringsAsFactors = FALSE)
notsymbio97 <- poormatch97[which(poormatch97$symbio==F), 1]
poormatch97bs <- readLines("data/otus_97_bysample/poormatch_IDs.txt")
poormatch97bs <- data.frame(otu=str_extract(poormatch97bs, "denovo[^ ]*"),
symbio=str_detect(poormatch97bs, "Symbiodinium"), # TRUE if Symbiodinium
stringsAsFactors = FALSE)
notsymbio97bs <- poormatch97bs[which(poormatch97bs$symbio==F), 1]
poormatch97bsp <- readLines("data/otus_97_byspecies/poormatch_IDs.txt")
poormatch97bsp <- data.frame(otu=str_extract(poormatch97bsp, "denovo[^ ]*"),
symbio=str_detect(poormatch97bsp, "Symbiodinium"), # TRUE if Symbiodinium
stringsAsFactors = FALSE)
notsymbio97bsp <- poormatch97bsp[which(poormatch97bsp$symbio==F), 1]
poormatch100 <- readLines("data/otus_100/poormatch_IDs.txt")
poormatch100 <- data.frame(otu=str_extract(poormatch100, "denovo[^ ]*"),
symbio=str_detect(poormatch100, "Symbiodinium"), # TRUE if Symbiodinium
stringsAsFactors = FALSE)
notsymbio100 <- poormatch100[which(poormatch100$symbio==F), 1]
# Remove OTUs that are not Symbiodinium
phy97.f <- prune_taxa(!taxa_names(phy97) %in% notsymbio97, phy97)
phy97bs.f <- prune_taxa(!taxa_names(phy97bs) %in% notsymbio97bs, phy97bs)
phy97bsp.f <- prune_taxa(!taxa_names(phy97bsp) %in% notsymbio97bsp, phy97bsp)
phy100.f <- prune_taxa(!taxa_names(phy100) %in% notsymbio100, phy100)
# Filter OTUs by minimum count
# Set threshold count
n <- 10
# Identify OTUs below threshold count
taxa97 <- taxa_sums(phy97.f)[which(taxa_sums(phy97.f) >= n)]
taxa97bs <- taxa_sums(phy97bs.f)[which(taxa_sums(phy97bs.f) >= n)]
taxa97bsp <- taxa_sums(phy97bsp.f)[which(taxa_sums(phy97bsp.f) >= n)]
taxa100 <- taxa_sums(phy100.f)[which(taxa_sums(phy100.f) >= n)]
# Remove taxa below threshold count
phy97.f <- prune_taxa(names(taxa97), phy97.f)
phy97bs.f <- prune_taxa(names(taxa97bs), phy97bs.f)
phy97bsp.f <- prune_taxa(names(taxa97bsp), phy97bsp.f)
phy100.f <- prune_taxa(names(taxa100), phy100.f)
# Filter samples by minimum count
# Set threshold number of reads
sn <- 200
# Remove samples with fewer reads than threshold
phy97.f <- prune_samples(sample_sums(phy97.f)>=sn, phy97.f)
phy97bs.f <- prune_samples(sample_sums(phy97bs.f)>=sn, phy97bs.f)
phy97bsp.f <- prune_samples(sample_sums(phy97bsp.f)>=sn, phy97bsp.f)
phy100.f <- prune_samples(sample_sums(phy100.f)>=sn, phy100.f)
# Filter OTUs by minimum count again in case any dropped below threshold after filtering samples
# Identify OTUs below threshold count
taxa97 <- taxa_sums(phy97.f)[which(taxa_sums(phy97.f) >= n)]
taxa97bs <- taxa_sums(phy97bs.f)[which(taxa_sums(phy97bs.f) >= n)]
taxa97bsp <- taxa_sums(phy97bsp.f)[which(taxa_sums(phy97bsp.f) >= n)]
taxa100 <- taxa_sums(phy100.f)[which(taxa_sums(phy100.f) >= n)]
# Remove taxa below threshold count
phy97.f <- prune_taxa(names(taxa97), phy97.f)
phy97bs.f <- prune_taxa(names(taxa97bs), phy97bs.f)
phy97bsp.f <- prune_taxa(names(taxa97bsp), phy97bsp.f)
phy100.f <- prune_taxa(names(taxa100), phy100.f)
# Label clades and subtypes for filtered phyloseq object tax_tables
get.st <- function(df) {
within(df, {
Clade <- substr(hit, 1, 1)
Subtype <- gsub(hit, pattern="_[A-Z]{2}[0-9]{6}", replacement="")
Subtype <- gsub(Subtype, pattern="_multiple", replacement="")
Subtype2 <- ifelse(as.numeric(sim)==100, paste0("'", Subtype, "'"),
paste0("'[", rep(rle(sort(Subtype))$values, times=rle(sort(Subtype))$lengths), "]'^",
unlist(lapply(rle(sort(Subtype))$lengths, seq_len)))[order(order(Subtype))])
#Subtype <- ifelse(as.numeric(sim)==100, Subtype, paste("*", Subtype, sep=""))
})
}
tax_table(phy97.f) <- as.matrix(get.st(data.frame(tax_table(phy97.f), stringsAsFactors=FALSE)))
tax_table(phy97bs.f) <- as.matrix(get.st(data.frame(tax_table(phy97bs.f), stringsAsFactors=FALSE)))
tax_table(phy97bsp.f) <- as.matrix(get.st(data.frame(tax_table(phy97bsp.f), stringsAsFactors=FALSE)))
tax_table(phy100.f) <- as.matrix(get.st(data.frame(tax_table(phy100.f), stringsAsFactors=FALSE)))
# Compute summary statistics
stats97.f <- data.frame(`97% OTUs`=t(phystats(phy97.f)), check.names=F)
stats97bs.f <- data.frame(`97% within-sample OTUs`=t(phystats(phy97bs.f)), check.names=F)
stats97bsp.f <- data.frame(`97% within-species OTUs`=t(phystats(phy97bsp.f)), check.names=F)
stats100.f <- data.frame(`100% OTUs`=t(phystats(phy100.f)), check.names=F)
# Create and plot histograms
taxhist97 <- hist(log10(taxa_sums(phy97.f)), plot=F)
taxhist97bs <- hist(log10(taxa_sums(phy97bs.f)), plot=F)
taxhist97bsp <- hist(log10(taxa_sums(phy97bsp.f)), plot=F)
taxhist100 <- hist(log10(taxa_sums(phy100.f)), plot=F)
samhist97 <- hist(log10(sample_sums(phy97.f)), plot=F)
samhist97bs <- hist(log10(sample_sums(phy97bs.f)), plot=F)
samhist97bsp <- hist(log10(sample_sums(phy97bsp.f)), plot=F)
samhist100 <- hist(log10(sample_sums(phy100.f)), plot=F)
par(mfrow=c(2, 4), mar=c(3,3,1,1))
plot(taxhist97, col="black", main="97% OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist97bs, col="black", main="97% sample OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist97bsp, col="black", main="97% species OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist100, col="black", main="100% OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(samhist97, col="black", main="97% OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist97bs, col="black", main="97% sample OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist97bsp, col="black", main="97% species OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist100, col="black", main="100% OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
# Create stats table
knitr::kable(cbind(stats97.f, stats97bs.f, stats97bsp.f, stats100.f))
| 97% OTUs | 97% within-sample OTUs | 97% within-species OTUs | 100% OTUs | |
|---|---|---|---|---|
| Total count in OTU table | 1489768 | 1489657 | 1489781 | 943643 |
| Number of OTUs | 94 | 106 | 86 | 4718 |
| Range of OTU counts | 10 - 742671 | 10 - 472752 | 10 - 566295 | 10 - 171212 |
| Number of singleton OTUs | 0 | 0 | 0 | 0 |
| Number of samples | 80 | 80 | 80 | 80 |
| Range of reads per sample | 706 - 169884 | 707 - 169890 | 707 - 169893 | 485 - 97003 |
| Arithmetic mean (±SD) reads per sample | 18622 ± 21102 | 18620 ± 21103 | 18622 ± 21103 | 11795 ± 12744 |
| Geometric mean (±SD) reads per sample | 13141 ± 2 | 13137 ± 2 | 13141 ± 2 | 8141 ± 2 |
# Compute summary statistics
stats97 <- data.frame(`97% OTUs`=t(phystats(phy97)), check.names=F)
stats97bs <- data.frame(`97% within-sample OTUs`=t(phystats(phy97bs)), check.names=F)
stats97bsp <- data.frame(`97% within-species OTUs`=t(phystats(phy97bsp)), check.names=F)
stats100 <- data.frame(`100% OTUs`=t(phystats(phy100)), check.names=F)
# Create and plot histograms
taxhist97 <- hist(log10(taxa_sums(phy97)), plot=F)
samhist97 <- hist(log10(sample_sums(phy97)), plot=F)
taxhist97bs <- hist(log10(taxa_sums(phy97bs)), plot=F)
samhist97bs <- hist(log10(sample_sums(phy97bs)), plot=F)
taxhist97bsp <- hist(log10(taxa_sums(phy97bsp)), plot=F)
samhist97bsp <- hist(log10(sample_sums(phy97bsp)), plot=F)
taxhist100 <- hist(log10(taxa_sums(phy100)), plot=F)
samhist100 <- hist(log10(sample_sums(phy100)), plot=F)
par(mfrow=c(2, 4), mar=c(3,3,1,1))
plot(taxhist97, col="black", main="97% OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist97bs, col="black", main="97% within-sample OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist97bsp, col="black", main="97% within-species OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(taxhist100, col="black", main="100% OTU counts", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. OTUs", cex.lab=0.75, cex.axis=0.75)
plot(samhist97, col="black", main="97% OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist97bs, col="black", main="97% within-sample OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist97bs, col="black", main="97% within-species OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
plot(samhist100, col="black", main="100% OTU reads per sample", xlim=c(0, 6), las=1, mgp=c(2,0.5,0),
xlab="No. sequences (log10)", ylab="No. samples", cex.lab=0.75, cex.axis=0.75)
# Create stats table
knitr::kable(cbind(stats97, stats97bs, stats97bsp, stats100))
| 97% OTUs | 97% within-sample OTUs | 97% within-species OTUs | 100% OTUs | |
|---|---|---|---|---|
| Total count in OTU table | 1490760 | 1490720 | 1490757 | 1055405 |
| Number of OTUs | 130 | 179 | 124 | 41390 |
| Range of OTU counts | 2 - 742681 | 2 - 472753 | 2 - 566413 | 2 - 171257 |
| Number of singleton OTUs | 0 | 0 | 0 | 0 |
| Number of samples | 84 | 84 | 84 | 84 |
| Range of reads per sample | 3 - 169903 | 3 - 169901 | 3 - 169901 | 1 - 112784 |
| Arithmetic mean (±SD) reads per sample | 17747 ± 20969 | 17746 ± 20969 | 17747 ± 20969 | 12564 ± 14471 |
| Geometric mean (±SD) reads per sample | 9420 ± 5 | 9393 ± 5 | 9410 ± 5 | 6449 ± 6 |
Count data are transformed to both relative abundance (proportions) and square-root proportions for downstream statistical analyses.
# Convert to proportion (relative abundance)
phy97.f.p <- transform_sample_counts(phy97.f, function(x) x/sum(x))
phy97bs.f.p <- transform_sample_counts(phy97bs.f, function(x) x/sum(x))
phy97bsp.f.p <- transform_sample_counts(phy97bsp.f, function(x) x/sum(x))
phy100.f.p <- transform_sample_counts(phy100.f, function(x) x/sum(x))
# Apply transformation function
transform <- function(x) sqrt(x/sum(x)) # Set transformation function
phy97.f.t <- transform_sample_counts(phy97.f, transform) # Transform data
phy97bs.f.t <- transform_sample_counts(phy97bs.f, transform)
phy97bsp.f.t <- transform_sample_counts(phy97bsp.f, transform)
phy100.f.t <- transform_sample_counts(phy100.f, transform)
Here the composition of each sample is plotted as a horizontal bar, sorted by species and shore. Each segment of the bar represents an individual OTU and is colored by clade. This plot is a nice overview of the whole dataset but only provides coarse information at the clade level.
cladeAbund <- aggregate(data.frame(RelAbund=rowSums(otu_table(phy97bs.f.p))),
by=list(Clade=data.frame(tax_table(phy97bs.f.p))$Clade), FUN=sum)
cladeAbund$Prop <- prop.table(cladeAbund$RelAbund)
bars <- barplot(cladeAbund$Prop*100, col=taxcolors, space=0,
names.arg=cladeAbund$Clade, xlab=expression(paste(italic('Symbiodinium'), " Clade")),
ylab="Relative abundance (%)")
text(bars, cladeAbund$Prop*100+2, labels=round(cladeAbund$Prop*100, 1), xpd=T)
cladeAbund$Notus <- table(data.frame(tax_table(phy97bs.f.p))$Clade)
par(mfrow=c(1,1), mar=c(2, 1.5, 2, 5), lwd=0.1, cex=0.7)
# Plot composition of 97% within-sample OTUs colored by clade
composition(phy97bs.f.p, col=taxcolors[factor(data.frame(tax_table(phy97bs.f.p))[order(data.frame(tax_table(phy97bs.f.p))$Subtype), ]$Clade, levels=c("A","B","C","D","F","G"))], legend=T)
# Plot composition of 97% within-sample OTUs colored by OTU
#composition(phy97bs.f.p, col=rainbow(ntaxa(phy97bs.f.p)), legend=F)
For each coral species, barplots are presented showing the relative abundance of OTUs obtained by 100%, 97%, and 97%-within-sample clustering. OTUs comprising more than 4% of a sample are labeled with the unique OTU number and the Symbiodinium subtype and NCBI GenBank accession number of the closest BLAST hit for that OTU in the reference database. OTU numbers and barplot colors are NOT comparable across clustering methods.
Notice how 97% OTUs and 97% within-sample OTUs are NOT THE SAME! Specifically, notice how in the 97% OTU dataset, all samples except two are dominated by the same OTU, B1. In the 97% within-sample OTU dataset, those samples are dominated by three different OTUs – B1, [B1]^6, and [B1]^3. Look at the 100% OTU dataset to see the composition of unique sequence variants in these samples. Indeed, these samples are dominated by different sequence variants. However, in the 97% OTU approach, they have all been collapsed into the same OTU, while in the 97% within-sample clustering approach, samples with different dominant sequence variants are maintained as separate OTUs, even though these different sequence variants are more than 97% similar to each other. I suggest that the within-sample clustering approach is a better way of treating Symbiodinium ITS2 sequence data.
# Create subsetted phyloseq objects for Diploria strigosa
dstr97.f.p <- subset_samples(phy97.f.p, Species=="strigosa")
dstr97bs.f.p <- subset_samples(phy97bs.f.p, Species=="strigosa")
dstr97bsp.f.p <- subset_samples(phy97bsp.f.p, Species=="strigosa")
dstr100.f.p <- subset_samples(phy100.f.p, Species=="strigosa")
# Plot custom barplots for Diploria strigosa
par(mfrow=c(4,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(dstr97.f.p, main="97% OTUs")
otubarplot(dstr97bsp.f.p, main="97% within-species OTUs")
otubarplot(dstr97bs.f.p, main="97% within-sample OTUs")
otubarplot(dstr100.f.p, main="100% OTUs")
dstr.net <- makenet(dstr97bs.f.p, 0)
set.seed(54538)
plotnet(dstr.net)
White circles represent coral samples, colored circles represent Symbiodinium OTUs (97% within-sample). Thickness of lines corresponds to the relative abundance of an OTU within a sample.
# Create subsetted phyloseq objects for Porites furcata
pfur97.f.p <- subset_samples(phy97.f.p, Species=="furcata")
pfur97bsp.f.p <- subset_samples(phy97bsp.f.p, Species=="furcata")
pfur97bs.f.p <- subset_samples(phy97bs.f.p, Species=="furcata")
pfur100.f.p <- subset_samples(phy100.f.p, Species=="furcata")
# Plot custom barplots for Porites furcata
par(mfrow=c(4,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(pfur97.f.p, main="97% OTUs")
otubarplot(pfur97bsp.f.p, main="97% within-species OTUs")
otubarplot(pfur97bs.f.p, main="97% within-sample OTUs")
otubarplot(pfur100.f.p, main="100% OTUs")
# Network analysis
pfur.net <- makenet(pfur97bs.f.p, 0)
par(mar=c(0,0,0,0))
set.seed(54538)
plotnet(pfur.net)
White circles represent coral samples, colored circles represent Symbiodinium OTUs (97% within-sample). Thickness of lines corresponds to the relative abundance of an OTU within a sample.
# Create subsetted phyloseq objects for Orbicella annularis
oann97.f.p <- subset_samples(phy97.f.p, Species=="annularis")
oann97bsp.f.p <- subset_samples(phy97bsp.f.p, Species=="annularis")
oann97bs.f.p <- subset_samples(phy97bs.f.p, Species=="annularis")
oann100.f.p <- subset_samples(phy100.f.p, Species=="annularis")
# Plot custom barplots for Orbicella annularis
par(mfrow=c(4,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(oann97.f.p, main="97% OTUs")
otubarplot(oann97bsp.f.p, main="97% within-species OTUs")
otubarplot(oann97bs.f.p, main="97% within-sample OTUs")
otubarplot(oann100.f.p, main="100% OTUs")
# Network analysis
oann.net <- makenet(oann97bs.f.p, 0)
par(mar=c(0,0,0,0))
set.seed(54538)
plotnet(oann.net)
White circles represent coral samples, colored circles represent Symbiodinium OTUs (97% within-sample). Thickness of lines corresponds to the relative abundance of an OTU within a sample.
# Create subsetted phyloseq objects for Millepora alcicornis
malc97.f.p <- subset_samples(phy97.f.p, Species=="alcicornis")
malc97bsp.f.p <- subset_samples(phy97bsp.f.p, Species=="alcicornis")
malc97bs.f.p <- subset_samples(phy97bs.f.p, Species=="alcicornis")
malc100.f.p <- subset_samples(phy100.f.p, Species=="alcicornis")
# Plot custom barplots for Millepora alcicornis
par(mfrow=c(4,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(malc97.f.p, main="97% OTUs")
otubarplot(malc97bsp.f.p, main="97% within-sample OTUs")
otubarplot(malc97bs.f.p, main="97% within-sample OTUs")
otubarplot(malc100.f.p, main="100% OTUs")
# Network analysis
malc.net <- makenet(malc97bs.f.p, 0)
par(mar=c(0,0,0,0))
set.seed(54538)
plotnet(malc.net)
White circles represent coral samples, colored circles represent Symbiodinium OTUs (97% within-sample). Thickness of lines corresponds to the relative abundance of an OTU within a sample.
# Create subsetted phyloseq objects for Siderastrea siderea
ssid97.f.p <- subset_samples(phy97.f.p, Species=="siderea")
ssid97bsp.f.p <- subset_samples(phy97bsp.f.p, Species=="siderea")
ssid97bs.f.p <- subset_samples(phy97bs.f.p, Species=="siderea")
ssid100.f.p <- subset_samples(phy100.f.p, Species=="siderea")
# Plot custom barplots for Siderastrea siderea
par(mfrow=c(4,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(ssid97.f.p, main="97% OTUs")
otubarplot(ssid97bsp.f.p, main="97% within-species OTUs")
otubarplot(ssid97bs.f.p, main="97% within-sample OTUs")
otubarplot(ssid100.f.p, main="100% OTUs")
# Network analysis
ssid.net <- makenet(ssid97bs.f.p, 0)
par(mar=c(0,0,0,0))
set.seed(834)
plotnet(ssid.net)
White circles represent coral samples, colored circles represent Symbiodinium OTUs (97% within-sample). Thickness of lines corresponds to the relative abundance of an OTU within a sample.
# Create subsetted phyloseq objects for Favia fragum
ffra97.f.p <- subset_samples(phy97.f.p, Species=="fragum")
ffra97bsp.f.p <- subset_samples(phy97bsp.f.p, Species=="fragum")
ffra97bs.f.p <- subset_samples(phy97bs.f.p, Species=="fragum")
ffra100.f.p <- subset_samples(phy100.f.p, Species=="fragum")
# Plot custom barplots for Favia fragum
par(mfrow=c(4,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(ffra97.f.p, main="97% OTUs")
otubarplot(ffra97bsp.f.p, main="97% within-species OTUs")
otubarplot(ffra97bs.f.p, main="97% within-sample OTUs")
otubarplot(ffra100.f.p, main="100% OTUs")
# Network analysis
ffra.net <- makenet(ffra97bs.f.p, 0)
par(mar=c(0,0,0,0))
set.seed(54538)
plotnet(ffra.net)
White circles represent coral samples, colored circles represent Symbiodinium OTUs (97% within-sample). Thickness of lines corresponds to the relative abundance of an OTU within a sample.
# Create subsetted phyloseq objects for Siderastrea radians
srad97.f.p <- subset_samples(phy97.f.p, Species=="radians")
srad97bsp.f.p <- subset_samples(phy97bsp.f.p, Species=="radians")
srad97bs.f.p <- subset_samples(phy97bs.f.p, Species=="radians")
srad100.f.p <- subset_samples(phy100.f.p, Species=="radians")
# Plot custom barplots for Siderastrea radians
par(mfrow=c(4,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(srad97.f.p, main="97% OTUs")
otubarplot(srad97bsp.f.p, main="97% within-species OTUs")
otubarplot(srad97bs.f.p, main="97% within-sample OTUs")
otubarplot(srad100.f.p, main="100% OTUs")
# Network analysis
srad.net <- makenet(srad97bs.f.p, 0)
par(mar=c(0,0,0,0))
set.seed(54538)
plotnet(srad.net)
White circles represent coral samples, colored circles represent Symbiodinium OTUs (97% within-sample). Thickness of lines corresponds to the relative abundance of an OTU within a sample.
# Create subsetted phyloseq objects for Porites astreoides
past97.f.p <- subset_samples(phy97.f.p, Species=="astreoides")
past97bsp.f.p <- subset_samples(phy97bsp.f.p, Species=="astreoides")
past97bs.f.p <- subset_samples(phy97bs.f.p, Species=="astreoides")
past100.f.p <- subset_samples(phy100.f.p, Species=="astreoides")
# Plot custom barplots for Porites astreoides
par(mfrow=c(4,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(past97.f.p, main="97% OTUs")
otubarplot(past97bsp.f.p, main="97% within-species OTUs")
otubarplot(past97bs.f.p, main="97% within-sample OTUs")
otubarplot(past100.f.p, main="100% OTUs")
# Network analysis
past.net <- makenet(past97bs.f.p, 0)
par(mar=c(0,0,0,0))
set.seed(54538)
plotnet(past.net)
White circles represent coral samples, colored circles represent Symbiodinium OTUs (97% within-sample). Thickness of lines corresponds to the relative abundance of an OTU within a sample.
# Create subsetted phyloseq objects for Dendrogyra cylindrus
dcyl97.f.p <- subset_samples(phy97.f.p, Species=="cylindrus")
dcyl97bsp.f.p <- subset_samples(phy97bsp.f.p, Species=="cylindrus")
dcyl97bs.f.p <- subset_samples(phy97bs.f.p, Species=="cylindrus")
dcyl100.f.p <- subset_samples(phy100.f.p, Species=="cylindrus")
# Plot custom barplots for Dendrogyra cylindrus
par(mfrow=c(4,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(dcyl97.f.p, main="97% OTUs")
otubarplot(dcyl97bsp.f.p, main="97% within-species OTUs")
otubarplot(dcyl97bs.f.p, main="97% within-sample OTUs")
otubarplot(dcyl100.f.p, main="100% OTUs")
# Network analysis
dcyl.net <- makenet(dcyl97bs.f.p, 0)
par(mar=c(0,0,0,0))
set.seed(54538)
plotnet(dcyl.net)
White circles represent coral samples, colored circles represent Symbiodinium OTUs (97% within-sample). Thickness of lines corresponds to the relative abundance of an OTU within a sample.
# Create subsetted phyloseq objects for Montastraea cavernosa
mcav97.f.p <- subset_samples(phy97.f.p, Species=="cavernosa")
mcav97bsp.f.p <- subset_samples(phy97bsp.f.p, Species=="cavernosa")
mcav97bs.f.p <- subset_samples(phy97bs.f.p, Species=="cavernosa")
mcav100.f.p <- subset_samples(phy100.f.p, Species=="cavernosa")
# Plot custom barplots for Montastraea cavernosa
par(mfrow=c(4,1), mar=c(4,4,2,0), mgp=c(0,0.5,0), lwd=0.1)
otubarplot(mcav97.f.p, main="97% OTUs")
otubarplot(mcav97bsp.f.p, main="97% within-species OTUs")
otubarplot(mcav97bs.f.p, main="97% within-sample OTUs")
otubarplot(mcav100.f.p, main="100% OTUs")
# Network analysis
mcav.net <- makenet(mcav97bs.f.p, 0)
par(mar=c(0,0,0,0))
set.seed(54538)
plotnet(mcav.net)
White circles represent coral samples, colored circles represent Symbiodinium OTUs (97% within-sample). Thickness of lines corresponds to the relative abundance of an OTU within a sample.
In this network, each coral species is collapsed into a single node (white squares), and connected to every Symbiodinium OTU (colored circles) that comprises at least 1% relative abundance within at least one individual of that species. Thus, very low abundance OTUs are not represented. The thickness of connections between coral species and OTUs is scaled by the relative proportion of samples of that species in which that OTU was present at ≥1% relative abundance (i.e., the frequency of presence at ≥1%). The size of Symbiodinium OTU nodes in the network is scaled by the number of species in which they occur, and colored according to clade.
abunet <- sppnet(phy=phy97bs.f.p, plot=F,
fun=function(x) length(x[x>0.01])/length(x),
layout=layout.fruchterman.reingold)
par(mar=c(0,0,0,0))
V(abunet)$size <- ifelse(is.na(V(abunet)$Clade), 15, 10*sqrt(degree(abunet)))
set.seed(12374)
l <- layout.fruchterman.reingold(abunet)
l <- norm_coords(l, ymin=-1, ymax=1, xmin=-1, xmax=1)
plot(abunet, rescale=F, layout=l, edge.curved=0.1, vertex.label.cex=V(abunet)$size/20,
vertex.label.color="black")
# Create dominant symbionts network
domnet <- sppnet(phy=phy97bs.f.p, plot=F,
fun=function(x) length(x[x>0.5])/length(x),
layout=layout.fruchterman.reingold)
par(mar=c(0,0,0,0), lwd=1)
set.seed(12374)
l <- layout.fruchterman.reingold(domnet)
l <- norm_coords(l, ymin=-1, ymax=1, xmin=-1, xmax=1)
plot(domnet, rescale=F, layout=l, edge.curved=0.1, vertex.label.cex=V(domnet)$size/20,
vertex.label.color="black")
# Create background symbionts network
# filter out OTUs that only occurred in one sample
phy97bs.f.p2 <- filter_taxa(phy97bs.f.p, function(x) sum(x>0) > 1, prune=TRUE)
bkgnet <- sppnet(phy=phy97bs.f.p2, plot=F,
fun=function(x) any(x>0 & x<0.01),
layout=layout.fruchterman.reingold)
# Merge D nodes (assuming they are all D1a/trenchii)
# Get original node map (sequence starting at one to the total number of nodes)
nodemap <- seq(1:length(V(bkgnet)))
# Make a copy of the nodemap that will be modified
nodemapnew <- nodemap
# Find all nodes beginning with D, and give them the same node ID number
nodemapnew[grep("^D", V(bkgnet)$Subtype)] <- min(grep("^D", V(bkgnet)$Subtype))
# Replace the now missing values in the node map so node IDs remain sequential
while (max(setdiff(nodemap, nodemapnew)) < max(nodemap)) {
nodemapnew[which(nodemapnew==max(setdiff(nodemap, nodemapnew))+1)] <- min(setdiff(nodemap, nodemapnew))
}
# Contract the network using the new nodemap (nodes with same node ID are merged)
bkgnet2 <- contract(bkgnet, nodemapnew, vertex.attr.comb=list(size="sum", "last"))
# Simplify network so that multiple edges are combined
bkgnet2 <- simplify(bkgnet2, remove.multiple=T, edge.attr.comb=list(weight="mean", width="mean", "ignore"))
# remove symbionts that are only connected to one or two coral species
bkgnet3 <- delete.vertices(bkgnet2, degree(bkgnet2)<=2) # & V(net2)$type==2)
V(bkgnet3)$size <- ifelse(is.na(V(bkgnet3)$Clade), 15, 1.5*degree(bkgnet3)^1.5)
E(bkgnet3)$width <- E(bkgnet3)$weight * 2
# Plot background symbionts network
set.seed(42384)
l3 <- layout.fruchterman.reingold(bkgnet3)
l3 <- norm_coords(l3, ymin=-1, ymax=1, xmin=-1, xmax=1)
par(mar=c(0,0,0,0))
plot(bkgnet3, rescale=F, layout=l3*1, edge.curved=0.1, vertex.label.cex=V(bkgnet3)$size/20,
vertex.label.color="black")
points(0.9,0.9, pch=0, cex=4)
text(0.9,0.9, "O.ann.", cex=0.5)
betad97bs <- betad(phy97bs.f.t, group="Species")
with(betad97bs$sambdsumm.ord, {
plot(mean, type="n", main="97% within-sample OTUs",
ylim=c(0, 0.75), ylab="Distance to centroid", xaxt="n", xlab="")
arrows(1:10, mean - se, 1:10, mean + se, length=0.05, angle=90, code=3)
points(1:10, mean, cex=1, pch=21, bg="white")
text(1:10, par("usr")[3]-0.025, srt=45, adj=1, xpd=T, cex=0.75,
labels=levels(sam$Species)[order(betad97bs$sambdsumm$mean, decreasing=T)])
text(1:10, mean + se + 0.03, labels=betad97bs$saml$Letters[as.character(group)], cex=0.5)
})
betad100 <- betad(phy100.f.t, group="Species")
with(betad100$sambdsumm.ord, {
plot(mean, type="n", main="100% OTUs",
ylim=c(0, 0.75), ylab="Distance to centroid", xaxt="n", xlab="")
arrows(1:10, mean - se, 1:10, mean + se, length=0.05, angle=90, code=3)
points(1:10, mean, cex=1, pch=21, bg="white")
text(1:10, par("usr")[3]-0.025, srt=45, adj=1, xpd=T, cex=0.75,
labels=levels(sam$Species)[order(betad100$sambdsumm$mean, decreasing=T)])
text(1:10, mean + se + 0.03, labels=betad100$saml$Letters[as.character(group)], cex=0.5)
})
betad97 <- betad(phy97.f.t, group="Species")
# Figure: Distance to centroids
with(betad97$sambdsumm.ord, {
plot(mean, type="n", main="97% OTUs",
ylim=c(0, 0.75), ylab="Distance to centroid", xaxt="n", xlab="")
arrows(1:10, mean - se, 1:10, mean + se, length=0.05, angle=90, code=3)
points(1:10, mean, cex=1, pch=21, bg="white")
text(1:10, par("usr")[3]-0.025, srt=45, adj=1, xpd=T, cex=0.75,
labels=levels(sam$Species)[order(betad97$sambdsumm$mean, decreasing=T)])
text(1:10, mean + se + 0.03, labels=betad97$saml$Letters[as.character(group)], cex=0.5)
})
Whether Symbiodinium communities differ among species is evaluated using permutational analysis of variance (PERMANOVA).
# PERMANOVA for difference among species
sppdiffs <- function(phy) {
permanova.spp.t <- adonis(phyloseq::distance(phy, "bray") ~ Species,
data=as(sample_data(phy), "data.frame"), permutations=9999)
permanova.spp.t
# Pairwise PERMANOVA tests for differences between species
dmat <- as(phyloseq::distance(phy, "bray"), "matrix")
df <- as(sample_data(phy), "data.frame")
pairs <- data.frame(t(combn(levels(df$Species), 2)), stringsAsFactors=F)
p.pairs <- setNames(rep(NA, nrow(pairs)), with(pairs, paste(X1, X2, sep='-')))
for (i in 1:nrow(pairs)) {
dfs <- subset(df, Species %in% c(pairs[i,1], pairs[i,2]))
dmats <- dmat[rownames(dfs), rownames(dfs)]
set.seed(152470)
permanova <- adonis(dmats ~ Species, data=dfs, permutations=999)
p.pairs[i] <- permanova$aov.tab$`Pr(>F)`[1]
}
# Return letters signifying differences
return(multcompLetters(p.pairs))
}
sppdiffs97 <- sppdiffs(phy97.f.t)
sppdiffs97bs <- sppdiffs(phy97bs.f.t)
sppdiffs100 <- sppdiffs(phy100.f.t)
sppdiffs <- data.frame("97% within-sample OTUs"=sppdiffs97bs$Letters,
"100% OTUs"=sppdiffs100$Letters,
"97% OTUs"=sppdiffs97$Letters, check.names=F)
knitr::kable(sppdiffs, caption="Species not sharing a letter are significantly different (p < 0.05)")
| 97% within-sample OTUs | 100% OTUs | 97% OTUs | |
|---|---|---|---|
| alcicornis | ab | a | a |
| annularis | ab | b | bc |
| astreoides | c | c | d |
| cavernosa | d | d | e |
| cylindrus | a | e | f |
| fragum | ab | f | a |
| furcata | e | g | b |
| radians | f | h | g |
| siderea | d | d | g |
| strigosa | b | b | ac |
Whether Symbiodinium communities differ between north vs. south shores within species is tested here using PERMANOVA.
set.seed(43789)
set.seed(43789)
shorestats97bs <- perms(phy97bs.f.t, group="Species", trt="Shore")
## 'nperm' > set of all permutations; Resetting 'nperm'.
## 'nperm' > set of all permutations; Resetting 'nperm'.
## 'nperm' > set of all permutations; Resetting 'nperm'.
knitr::kable(shorestats97bs, digits=3, row.names=F,
caption="97% within-sample OTU data: differences within and between shore for each species")
| Species | n | overall | within | between | R2 | bd | p |
|---|---|---|---|---|---|---|---|
| alcicornis | 10 | 0.625 | 0.674 | 0.586 | 0.038 | 0.985 | 0.739 |
| annularis | 4 | 0.574 | 0.574 | NaN | NA | NA | NA |
| astreoides | 5 | 0.066 | 0.066 | NaN | NA | NA | NA |
| cavernosa | 7 | 0.011 | 0.011 | 0.010 | 0.214 | 0.774 | 0.343 |
| cylindrus | 8 | 0.104 | 0.104 | 0.104 | 0.190 | 0.947 | 0.161 |
| fragum | 8 | 0.271 | 0.327 | 0.223 | 0.086 | 0.485 | 0.804 |
| furcata | 9 | 0.712 | 0.559 | 0.835 | 0.360 | 0.809 | 0.056 |
| radians | 10 | 0.104 | 0.105 | 0.103 | 0.119 | 0.847 | 0.375 |
| siderea | 10 | 0.518 | 0.535 | 0.506 | 0.087 | 0.568 | 0.483 |
| strigosa | 9 | 0.813 | 0.755 | 0.859 | 0.229 | 0.230 | 0.071 |
set.seed(43789)
shorestats100 <- perms(phy100.f.t, group="Species", trt="Shore")
## 'nperm' > set of all permutations; Resetting 'nperm'.
## 'nperm' > set of all permutations; Resetting 'nperm'.
## 'nperm' > set of all permutations; Resetting 'nperm'.
knitr::kable(shorestats100, digits=3, row.names=F,
caption="100% OTU data: differences within and between shore for each species")
| Species | n | overall | within | between | R2 | bd | p |
|---|---|---|---|---|---|---|---|
| alcicornis | 10 | 0.641 | 0.647 | 0.636 | 0.095 | 0.573 | 0.540 |
| annularis | 4 | 0.729 | 0.729 | NaN | NA | NA | NA |
| astreoides | 5 | 0.406 | 0.406 | NaN | NA | NA | NA |
| cavernosa | 7 | 0.455 | 0.451 | 0.458 | 0.188 | 0.948 | 0.314 |
| cylindrus | 8 | 0.430 | 0.379 | 0.473 | 0.213 | 0.026 | 0.054 |
| fragum | 8 | 0.551 | 0.575 | 0.531 | 0.129 | 0.296 | 0.536 |
| furcata | 9 | 0.696 | 0.553 | 0.810 | 0.446 | 0.386 | 0.024 |
| radians | 10 | 0.397 | 0.404 | 0.391 | 0.074 | 0.499 | 0.937 |
| siderea | 10 | 0.670 | 0.679 | 0.662 | 0.097 | 0.446 | 0.503 |
| strigosa | 9 | 0.812 | 0.763 | 0.851 | 0.222 | 0.042 | 0.095 |
set.seed(43789)
shorestats97 <- perms(phy97.f.t, group="Species", trt="Shore")
## 'nperm' > set of all permutations; Resetting 'nperm'.
## 'nperm' > set of all permutations; Resetting 'nperm'.
## 'nperm' > set of all permutations; Resetting 'nperm'.
knitr::kable(shorestats97, digits=3, row.names=F,
caption="97% OTU data: differences within and between shore for each species")
| Species | n | overall | within | between | R2 | bd | p |
|---|---|---|---|---|---|---|---|
| alcicornis | 10 | 0.104 | 0.105 | 0.103 | 0.087 | 0.332 | 0.642 |
| annularis | 4 | 0.589 | 0.589 | NaN | NA | NA | NA |
| astreoides | 5 | 0.083 | 0.083 | NaN | NA | NA | NA |
| cavernosa | 7 | 0.107 | 0.103 | 0.109 | 0.263 | 0.129 | 0.114 |
| cylindrus | 8 | 0.111 | 0.088 | 0.130 | 0.302 | 0.124 | 0.018 |
| fragum | 8 | 0.216 | 0.257 | 0.180 | 0.079 | 0.419 | 1.000 |
| furcata | 9 | 0.538 | 0.374 | 0.669 | 0.532 | 0.195 | 0.064 |
| radians | 10 | 0.129 | 0.135 | 0.124 | 0.063 | 0.966 | 0.826 |
| siderea | 10 | 0.298 | 0.275 | 0.316 | 0.212 | 0.192 | 0.016 |
| strigosa | 9 | 0.408 | 0.397 | 0.416 | 0.217 | 0.233 | 0.335 |